In [2]:
%matplotlib inline
In [3]:
import networkx as nx
print nx.__version__
First, let's define a base undirected acyclic graph. We're assuming these relationships are defined externally.
In [11]:
g = nx.Graph()
g.add_edge('otu1', 'int1', weight=0.1)
g.add_edge('otu2', 'int1', weight=0.2)
g.add_edge('otu3', 'int2', weight=0.5)
g.add_edge('otu4', 'int2', weight=0.7)
g.add_edge('int1', 'int2', weight=0.3)
In [12]:
nx.draw(g)
Next, let's setup our IDs and the corresponding counts for each sample.
In [13]:
otu_ids = ['otu1', 'otu2', 'otu3', 'otu4']
s1 = [1, 1, 0, 0]
s2 = [1, 1, 1, 0]
In [14]:
def _get_all_nodes(graph, tips):
"""Get all the nodes that connect the tips"""
nodes = set()
for i in range(len(tips) - 1):
nodes.update(set(nx.shortest_path(graph, source=tips[i], target=tips[i+1])))
return nodes
def unweighted_unifrac_uag(u_counts, v_counts, otu_ids, graph):
"""Compute unweighted unifrac over an undirected acyclic graph"""
u_ids = [i for u, i in zip(u_counts, otu_ids) if u]
v_ids = [i for v, i in zip(v_counts, otu_ids) if v]
u_nodes = _get_all_nodes(graph, u_ids)
v_nodes = _get_all_nodes(graph, v_ids)
u_sg = graph.subgraph(u_nodes)
v_sg = graph.subgraph(v_nodes)
unique_edges = set(u_sg.edges()).symmetric_difference(set(v_sg.edges()))
total_sg = graph.subgraph(u_nodes | v_nodes)
unique_sg = graph.subgraph(set(flatten(unique_edges)))
return unique_sg.size(weight='weight') / total_sg.size(weight='weight')
In [16]:
unweighted_unifrac_uag(s1, s2, otu_ids, g)
Out[16]:
In [ ]: